{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "file = r\"D:\\工作相关内容\\公司项目\\禁限运危化品货源\\标注数据\\20220216.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>example_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>text</th>\n",
       "      <th>text.1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>240507</td>\n",
       "      <td>261676</td>\n",
       "      <td>39</td>\n",
       "      <td>S:我是装什么地方千五？\\nD:装的。\\nD:$$垃圾$$是装什么东西？伙计说。\\nS:你想...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>240554</td>\n",
       "      <td>260449</td>\n",
       "      <td>39</td>\n",
       "      <td>D:杭州余杭到内蒙古鄂尔多斯的货多少钱？\\nD:他不是危险品嘛。\\nS:啊啊，归类里面规定是...</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>240573</td>\n",
       "      <td>261680</td>\n",
       "      <td>39</td>\n",
       "      <td>S:那个赤峰的。\\nD:四房的多少钱？\\nS:顺丰的评选$$脂肪$$120杜尔基装。\\nD:...</td>\n",
       "      <td>不确定</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>240549</td>\n",
       "      <td>261008</td>\n",
       "      <td>38</td>\n",
       "      <td>S:啊，好。\\nS:啊啊。\\nD:啊，南京架子$$老虎$$啊。\\nS:啊。\\nD:好好嘞拉不。\\n</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>240560</td>\n",
       "      <td>261679</td>\n",
       "      <td>39</td>\n",
       "      <td>D:装什么配件货什么货？\\nS:拉的是袋装活性炭，到灵宝长春卸的啊。\\nD:在哪装$$活性炭...</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4805</th>\n",
       "      <td>343919</td>\n",
       "      <td>260982</td>\n",
       "      <td>60</td>\n",
       "      <td>D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4806</th>\n",
       "      <td>343920</td>\n",
       "      <td>260981</td>\n",
       "      <td>60</td>\n",
       "      <td>D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4807</th>\n",
       "      <td>343921</td>\n",
       "      <td>260980</td>\n",
       "      <td>60</td>\n",
       "      <td>S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4808</th>\n",
       "      <td>343922</td>\n",
       "      <td>260979</td>\n",
       "      <td>60</td>\n",
       "      <td>D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4809</th>\n",
       "      <td>343923</td>\n",
       "      <td>260978</td>\n",
       "      <td>60</td>\n",
       "      <td>D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4810 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id  example_id  user_id  \\\n",
       "0     240507      261676       39   \n",
       "1     240554      260449       39   \n",
       "2     240573      261680       39   \n",
       "3     240549      261008       38   \n",
       "4     240560      261679       39   \n",
       "...      ...         ...      ...   \n",
       "4805  343919      260982       60   \n",
       "4806  343920      260981       60   \n",
       "4807  343921      260980       60   \n",
       "4808  343922      260979       60   \n",
       "4809  343923      260978       60   \n",
       "\n",
       "                                                   text text.1  \n",
       "0     S:我是装什么地方千五？\\nD:装的。\\nD:$$垃圾$$是装什么东西？伙计说。\\nS:你想...     不是  \n",
       "1     D:杭州余杭到内蒙古鄂尔多斯的货多少钱？\\nD:他不是危险品嘛。\\nS:啊啊，归类里面规定是...      是  \n",
       "2     S:那个赤峰的。\\nD:四房的多少钱？\\nS:顺丰的评选$$脂肪$$120杜尔基装。\\nD:...    不确定  \n",
       "3     S:啊，好。\\nS:啊啊。\\nD:啊，南京架子$$老虎$$啊。\\nS:啊。\\nD:好好嘞拉不。\\n     不是  \n",
       "4     D:装什么配件货什么货？\\nS:拉的是袋装活性炭，到灵宝长春卸的啊。\\nD:在哪装$$活性炭...      是  \n",
       "...                                                 ...    ...  \n",
       "4805  D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...     不是  \n",
       "4806  D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...     不是  \n",
       "4807  S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...     不是  \n",
       "4808  D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...     不是  \n",
       "4809  D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...     不是  \n",
       "\n",
       "[4810 rows x 5 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(file)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([39, 38, 55, 44, 37, 54, 60], dtype=int64)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users = df['user_id'].unique()\n",
    "users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>example_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>text</th>\n",
       "      <th>text.1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3309</th>\n",
       "      <td>318384</td>\n",
       "      <td>259559</td>\n",
       "      <td>60</td>\n",
       "      <td>S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...</td>\n",
       "      <td>不确定</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3310</th>\n",
       "      <td>318385</td>\n",
       "      <td>259560</td>\n",
       "      <td>60</td>\n",
       "      <td>D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3311</th>\n",
       "      <td>318386</td>\n",
       "      <td>259561</td>\n",
       "      <td>60</td>\n",
       "      <td>S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3312</th>\n",
       "      <td>318388</td>\n",
       "      <td>259562</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3313</th>\n",
       "      <td>318389</td>\n",
       "      <td>259563</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4805</th>\n",
       "      <td>343919</td>\n",
       "      <td>260982</td>\n",
       "      <td>60</td>\n",
       "      <td>D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4806</th>\n",
       "      <td>343920</td>\n",
       "      <td>260981</td>\n",
       "      <td>60</td>\n",
       "      <td>D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4807</th>\n",
       "      <td>343921</td>\n",
       "      <td>260980</td>\n",
       "      <td>60</td>\n",
       "      <td>S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4808</th>\n",
       "      <td>343922</td>\n",
       "      <td>260979</td>\n",
       "      <td>60</td>\n",
       "      <td>D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4809</th>\n",
       "      <td>343923</td>\n",
       "      <td>260978</td>\n",
       "      <td>60</td>\n",
       "      <td>D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1501 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id  example_id  user_id  \\\n",
       "3309  318384      259559       60   \n",
       "3310  318385      259560       60   \n",
       "3311  318386      259561       60   \n",
       "3312  318388      259562       60   \n",
       "3313  318389      259563       60   \n",
       "...      ...         ...      ...   \n",
       "4805  343919      260982       60   \n",
       "4806  343920      260981       60   \n",
       "4807  343921      260980       60   \n",
       "4808  343922      260979       60   \n",
       "4809  343923      260978       60   \n",
       "\n",
       "                                                   text text.1  \n",
       "3309  S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...    不确定  \n",
       "3310  D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...     不是  \n",
       "3311  S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...     不是  \n",
       "3312  S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...      是  \n",
       "3313  S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...     不是  \n",
       "...                                                 ...    ...  \n",
       "4805  D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...     不是  \n",
       "4806  D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...     不是  \n",
       "4807  S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...     不是  \n",
       "4808  D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...     不是  \n",
       "4809  D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...     不是  \n",
       "\n",
       "[1501 rows x 5 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_yunying = df[df['user_id'] == 60]\n",
    "df_yunying"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text</th>\n",
       "      <th>text.1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>D:$$垃圾$$，喂喂。\\nS:对。\\nS:哎。\\n</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>D:100块钱的米。\\nS:嗯。\\nD:惠州$$垃圾$$。\\nS:垃圾。\\nD:拉。\\n</td>\n",
       "      <td>不确定</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D:100路。\\nD:啊，对，你车吗？\\nS:价格也不算低，加不缺这个你的一个地方发$$脂肪...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D:1029。\\nS:哎。\\nD:宜宾$$香烟$$。\\nS:对对对。\\nD:好嘞。\\n</td>\n",
       "      <td>不确定</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>D:10吨货950块钱。\\nS:之前是800还是900吧，我记得。\\nD:那以前不是没有一千...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2466</th>\n",
       "      <td>S:高度不超的，还是到地方装128吨啊？\\nS:这五天用的，以前都是想着装好的总机了。\\nS...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2467</th>\n",
       "      <td>S:高栏。\\nS:不知道什么什么，什么是微度，这么说，没少。\\nS:要求交少交少了$$垃圾$...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2468</th>\n",
       "      <td>S:魅蓝metal大货不大，货大货不是小货大货。\\nD:那万一少了那么多湾小区要运费。\\nS...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2469</th>\n",
       "      <td>S:麻烦给800。\\nD:啊。\\nD:赖是赖是在南五名$$垃圾$$吗？在哪里呀？\\nS:就是...</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2470</th>\n",
       "      <td>S:龙卡。\\nD:啊。\\nS:$$木炭$$问问看。\\nD:啊木炭呢。\\nS:嗯。\\n</td>\n",
       "      <td>不确定</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2471 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   text text.1\n",
       "0                            D:$$垃圾$$，喂喂。\\nS:对。\\nS:哎。\\n     不是\n",
       "1          D:100块钱的米。\\nS:嗯。\\nD:惠州$$垃圾$$。\\nS:垃圾。\\nD:拉。\\n    不确定\n",
       "2     D:100路。\\nD:啊，对，你车吗？\\nS:价格也不算低，加不缺这个你的一个地方发$$脂肪...     不是\n",
       "3           D:1029。\\nS:哎。\\nD:宜宾$$香烟$$。\\nS:对对对。\\nD:好嘞。\\n    不确定\n",
       "4     D:10吨货950块钱。\\nS:之前是800还是900吧，我记得。\\nD:那以前不是没有一千...     不是\n",
       "...                                                 ...    ...\n",
       "2466  S:高度不超的，还是到地方装128吨啊？\\nS:这五天用的，以前都是想着装好的总机了。\\nS...     不是\n",
       "2467  S:高栏。\\nS:不知道什么什么，什么是微度，这么说，没少。\\nS:要求交少交少了$$垃圾$...     不是\n",
       "2468  S:魅蓝metal大货不大，货大货不是小货大货。\\nD:那万一少了那么多湾小区要运费。\\nS...     不是\n",
       "2469  S:麻烦给800。\\nD:啊。\\nD:赖是赖是在南五名$$垃圾$$吗？在哪里呀？\\nS:就是...     不是\n",
       "2470         S:龙卡。\\nD:啊。\\nS:$$木炭$$问问看。\\nD:啊木炭呢。\\nS:嗯。\\n    不确定\n",
       "\n",
       "[2471 rows x 2 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_biaozhu = df[df['user_id'] != 60]\n",
    "def join(df):\n",
    "    return \"$$\".join(set(df.values))\n",
    "df_all = df_biaozhu.groupby(['text'])[\"text.1\"].apply(join)\n",
    "df_all = df_all.to_frame()\n",
    "df_all = df_all.reset_index()\n",
    "df_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>example_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>text</th>\n",
       "      <th>text.1_x</th>\n",
       "      <th>text.1_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>318384</td>\n",
       "      <td>259559</td>\n",
       "      <td>60</td>\n",
       "      <td>S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...</td>\n",
       "      <td>不确定</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>318385</td>\n",
       "      <td>259560</td>\n",
       "      <td>60</td>\n",
       "      <td>D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>318386</td>\n",
       "      <td>259561</td>\n",
       "      <td>60</td>\n",
       "      <td>S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定$$是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>318388</td>\n",
       "      <td>259562</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...</td>\n",
       "      <td>是</td>\n",
       "      <td>不确定$$不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>318389</td>\n",
       "      <td>259563</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1496</th>\n",
       "      <td>343919</td>\n",
       "      <td>260982</td>\n",
       "      <td>60</td>\n",
       "      <td>D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1497</th>\n",
       "      <td>343920</td>\n",
       "      <td>260981</td>\n",
       "      <td>60</td>\n",
       "      <td>D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1498</th>\n",
       "      <td>343921</td>\n",
       "      <td>260980</td>\n",
       "      <td>60</td>\n",
       "      <td>S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1499</th>\n",
       "      <td>343922</td>\n",
       "      <td>260979</td>\n",
       "      <td>60</td>\n",
       "      <td>D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1500</th>\n",
       "      <td>343923</td>\n",
       "      <td>260978</td>\n",
       "      <td>60</td>\n",
       "      <td>D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1501 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id  example_id  user_id  \\\n",
       "0     318384      259559       60   \n",
       "1     318385      259560       60   \n",
       "2     318386      259561       60   \n",
       "3     318388      259562       60   \n",
       "4     318389      259563       60   \n",
       "...      ...         ...      ...   \n",
       "1496  343919      260982       60   \n",
       "1497  343920      260981       60   \n",
       "1498  343921      260980       60   \n",
       "1499  343922      260979       60   \n",
       "1500  343923      260978       60   \n",
       "\n",
       "                                                   text text.1_x text.1_y  \n",
       "0     S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...      不确定       不是  \n",
       "1     D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...       不是      不确定  \n",
       "2     S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...       不是   不确定$$是  \n",
       "3     S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...        是  不确定$$不是  \n",
       "4     S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...       不是        是  \n",
       "...                                                 ...      ...      ...  \n",
       "1496  D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...       不是       不是  \n",
       "1497  D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...       不是       不是  \n",
       "1498  S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...       不是        是  \n",
       "1499  D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...       不是        是  \n",
       "1500  D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...       不是       不是  \n",
       "\n",
       "[1501 rows x 6 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = pd.merge(df_yunying,df_all,how='left',on='text')\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>example_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>对话文本</th>\n",
       "      <th>运营</th>\n",
       "      <th>算法标注</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>318384</td>\n",
       "      <td>259559</td>\n",
       "      <td>60</td>\n",
       "      <td>S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...</td>\n",
       "      <td>不确定</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>318385</td>\n",
       "      <td>259560</td>\n",
       "      <td>60</td>\n",
       "      <td>D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>318386</td>\n",
       "      <td>259561</td>\n",
       "      <td>60</td>\n",
       "      <td>S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定$$是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>318388</td>\n",
       "      <td>259562</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...</td>\n",
       "      <td>是</td>\n",
       "      <td>不确定$$不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>318389</td>\n",
       "      <td>259563</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1496</th>\n",
       "      <td>343919</td>\n",
       "      <td>260982</td>\n",
       "      <td>60</td>\n",
       "      <td>D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1497</th>\n",
       "      <td>343920</td>\n",
       "      <td>260981</td>\n",
       "      <td>60</td>\n",
       "      <td>D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1498</th>\n",
       "      <td>343921</td>\n",
       "      <td>260980</td>\n",
       "      <td>60</td>\n",
       "      <td>S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1499</th>\n",
       "      <td>343922</td>\n",
       "      <td>260979</td>\n",
       "      <td>60</td>\n",
       "      <td>D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1500</th>\n",
       "      <td>343923</td>\n",
       "      <td>260978</td>\n",
       "      <td>60</td>\n",
       "      <td>D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1501 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id  example_id  user_id  \\\n",
       "0     318384      259559       60   \n",
       "1     318385      259560       60   \n",
       "2     318386      259561       60   \n",
       "3     318388      259562       60   \n",
       "4     318389      259563       60   \n",
       "...      ...         ...      ...   \n",
       "1496  343919      260982       60   \n",
       "1497  343920      260981       60   \n",
       "1498  343921      260980       60   \n",
       "1499  343922      260979       60   \n",
       "1500  343923      260978       60   \n",
       "\n",
       "                                                   对话文本   运营     算法标注  \n",
       "0     S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...  不确定       不是  \n",
       "1     D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...   不是      不确定  \n",
       "2     S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...   不是   不确定$$是  \n",
       "3     S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...    是  不确定$$不是  \n",
       "4     S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...   不是        是  \n",
       "...                                                 ...  ...      ...  \n",
       "1496  D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...   不是       不是  \n",
       "1497  D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...   不是       不是  \n",
       "1498  S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...   不是        是  \n",
       "1499  D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...   不是        是  \n",
       "1500  D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...   不是       不是  \n",
       "\n",
       "[1501 rows x 6 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = result.rename(columns={\"text.1_x\":\"运营\",\"text.1_y\":\"算法标注\",'text':\"对话文本\"})\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>example_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>对话文本</th>\n",
       "      <th>运营</th>\n",
       "      <th>算法标注</th>\n",
       "      <th>check_flag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>318384</td>\n",
       "      <td>259559</td>\n",
       "      <td>60</td>\n",
       "      <td>S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...</td>\n",
       "      <td>不确定</td>\n",
       "      <td>不是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>318385</td>\n",
       "      <td>259560</td>\n",
       "      <td>60</td>\n",
       "      <td>D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>318386</td>\n",
       "      <td>259561</td>\n",
       "      <td>60</td>\n",
       "      <td>S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定$$是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>318388</td>\n",
       "      <td>259562</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...</td>\n",
       "      <td>是</td>\n",
       "      <td>不确定$$不是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>318389</td>\n",
       "      <td>259563</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1496</th>\n",
       "      <td>343919</td>\n",
       "      <td>260982</td>\n",
       "      <td>60</td>\n",
       "      <td>D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1497</th>\n",
       "      <td>343920</td>\n",
       "      <td>260981</td>\n",
       "      <td>60</td>\n",
       "      <td>D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1498</th>\n",
       "      <td>343921</td>\n",
       "      <td>260980</td>\n",
       "      <td>60</td>\n",
       "      <td>S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1499</th>\n",
       "      <td>343922</td>\n",
       "      <td>260979</td>\n",
       "      <td>60</td>\n",
       "      <td>D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1500</th>\n",
       "      <td>343923</td>\n",
       "      <td>260978</td>\n",
       "      <td>60</td>\n",
       "      <td>D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1501 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id  example_id  user_id  \\\n",
       "0     318384      259559       60   \n",
       "1     318385      259560       60   \n",
       "2     318386      259561       60   \n",
       "3     318388      259562       60   \n",
       "4     318389      259563       60   \n",
       "...      ...         ...      ...   \n",
       "1496  343919      260982       60   \n",
       "1497  343920      260981       60   \n",
       "1498  343921      260980       60   \n",
       "1499  343922      260979       60   \n",
       "1500  343923      260978       60   \n",
       "\n",
       "                                                   对话文本   运营     算法标注  \\\n",
       "0     S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...  不确定       不是   \n",
       "1     D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...   不是      不确定   \n",
       "2     S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...   不是   不确定$$是   \n",
       "3     S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...    是  不确定$$不是   \n",
       "4     S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...   不是        是   \n",
       "...                                                 ...  ...      ...   \n",
       "1496  D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...   不是       不是   \n",
       "1497  D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...   不是       不是   \n",
       "1498  S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...   不是        是   \n",
       "1499  D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...   不是        是   \n",
       "1500  D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...   不是       不是   \n",
       "\n",
       "      check_flag  \n",
       "0              0  \n",
       "1              0  \n",
       "2              0  \n",
       "3              0  \n",
       "4              0  \n",
       "...          ...  \n",
       "1496           1  \n",
       "1497           1  \n",
       "1498           0  \n",
       "1499           0  \n",
       "1500           1  \n",
       "\n",
       "[1501 rows x 7 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def make_same_flag(item):\n",
    "    if item['运营'] == item['算法标注']:\n",
    "        return 1\n",
    "    else:\n",
    "        return 0\n",
    "result['check_flag'] = result.apply(make_same_flag,axis=1)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    796\n",
       "0    705\n",
       "Name: check_flag, dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result['check_flag'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://mirrors.aliyun.com/pypi/simple\n",
      "Requirement already satisfied: openpyxl in d:\\users\\fengfeng.qiu\\anaconda3\\envs\\validate\\lib\\site-packages (3.0.9)\n",
      "Requirement already satisfied: et-xmlfile in d:\\users\\fengfeng.qiu\\anaconda3\\envs\\validate\\lib\\site-packages (from openpyxl) (1.1.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install openpyxl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>example_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>对话文本</th>\n",
       "      <th>运营</th>\n",
       "      <th>算法标注</th>\n",
       "      <th>check_flag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>318384</td>\n",
       "      <td>259559</td>\n",
       "      <td>60</td>\n",
       "      <td>S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...</td>\n",
       "      <td>不确定</td>\n",
       "      <td>不是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>318385</td>\n",
       "      <td>259560</td>\n",
       "      <td>60</td>\n",
       "      <td>D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>318386</td>\n",
       "      <td>259561</td>\n",
       "      <td>60</td>\n",
       "      <td>S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不确定$$是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>318388</td>\n",
       "      <td>259562</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...</td>\n",
       "      <td>是</td>\n",
       "      <td>不确定$$不是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>318389</td>\n",
       "      <td>259563</td>\n",
       "      <td>60</td>\n",
       "      <td>S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1496</th>\n",
       "      <td>343919</td>\n",
       "      <td>260982</td>\n",
       "      <td>60</td>\n",
       "      <td>D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1497</th>\n",
       "      <td>343920</td>\n",
       "      <td>260981</td>\n",
       "      <td>60</td>\n",
       "      <td>D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1498</th>\n",
       "      <td>343921</td>\n",
       "      <td>260980</td>\n",
       "      <td>60</td>\n",
       "      <td>S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1499</th>\n",
       "      <td>343922</td>\n",
       "      <td>260979</td>\n",
       "      <td>60</td>\n",
       "      <td>D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...</td>\n",
       "      <td>不是</td>\n",
       "      <td>是</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1500</th>\n",
       "      <td>343923</td>\n",
       "      <td>260978</td>\n",
       "      <td>60</td>\n",
       "      <td>D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...</td>\n",
       "      <td>不是</td>\n",
       "      <td>不是</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1501 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id  example_id  user_id  \\\n",
       "0     318384      259559       60   \n",
       "1     318385      259560       60   \n",
       "2     318386      259561       60   \n",
       "3     318388      259562       60   \n",
       "4     318389      259563       60   \n",
       "...      ...         ...      ...   \n",
       "1496  343919      260982       60   \n",
       "1497  343920      260981       60   \n",
       "1498  343921      260980       60   \n",
       "1499  343922      260979       60   \n",
       "1500  343923      260978       60   \n",
       "\n",
       "                                                   对话文本   运营     算法标注  \\\n",
       "0     S:你直接你文下来，下来以后先找到这个厂，在很大一个家具厂。\\nD:是吧？\\nS:啊，这很大...  不确定       不是   \n",
       "1     D:啊啊，对。\\nS:然后。\\nS:劳$$老虎$$这边过去全家再拉回来。\\nD:啊，钱交到啊...   不是      不确定   \n",
       "2     S:咱们从屋里走的话，发信息有货要能不能跑了？你看网上有几个一起装？\\nS:平板他们搞。\\n...   不是   不确定$$是   \n",
       "3     S:对啊坑坑铁矿米多吨铁矿一个我很喜欢，只要叠起来在一吨嘞。\\nD:是是是是我拉过一回就能铁...    是  不确定$$不是   \n",
       "4     S:对。\\nD:再给整100。\\nS:我俩这个师傅亲自谅解，这个$$毒素$$都订好了，你就是...   不是        是   \n",
       "...                                                 ...  ...      ...   \n",
       "1496  D:到了评选，这个给多少钱呢？\\nS:你多大的车啊？六米八的是吧？\\nD:六米八的车$$电瓶...   不是       不是   \n",
       "1497  D:就一卷嘛，你。\\nD:山东那个白酒吗？\\nS:那我$$垃圾$$装不下。\\nD:啊，那个卷...   不是       不是   \n",
       "1498  S:3588。\\nD:3588。\\nS:你这一车$$锯末$$的，你赶紧装吗？装回来时了，车最...   不是        是   \n",
       "1499  D:提高不上。\\nS:啊。\\nD:我要给提高$$沥青$$不差不多嘛？\\nS:就是就是沥青啊。...   不是        是   \n",
       "1500  D:什么？\\nS:不如拉吧，汽车视频是吧？\\nD:啊，不是吧？不不是$$垃圾$$的，就是正正...   不是       不是   \n",
       "\n",
       "      check_flag  \n",
       "0              0  \n",
       "1              0  \n",
       "2              0  \n",
       "3              0  \n",
       "4              0  \n",
       "...          ...  \n",
       "1496           1  \n",
       "1497           1  \n",
       "1498           0  \n",
       "1499           0  \n",
       "1500           1  \n",
       "\n",
       "[1501 rows x 7 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_suanfa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
